001    /*
002     * RandomSequenceGenerator.java
003     *
004     * Copyright 2003 Sergio Anibal de Carvalho Junior
005     *
006     * This file is part of NeoBio.
007     *
008     * NeoBio is free software; you can redistribute it and/or modify it under the terms of
009     * the GNU General Public License as published by the Free Software Foundation; either
010     * version 2 of the License, or (at your option) any later version.
011     *
012     * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
013     * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
014     * PURPOSE. See the GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License along with NeoBio;
017     * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018     * Boston, MA 02111-1307, USA.
019     *
020     * Proper attribution of the author as the source of the software would be appreciated.
021     *
022     * Sergio Anibal de Carvalho Junior             mailto:sergioanibaljr@users.sourceforge.net
023     * Department of Computer Science               http://www.dcs.kcl.ac.uk
024     * King's College London, UK                    http://www.kcl.ac.uk
025     *
026     * Please visit http://neobio.sourceforge.net
027     *
028     * This project was supervised by Professor Maxime Crochemore.
029     *
030     */
031    
032    package neobio.textui;
033    
034    import java.io.BufferedWriter;
035    import java.io.Writer;
036    import java.io.FileWriter;
037    import java.io.OutputStreamWriter;
038    import java.io.IOException;
039    
040    /**
041     * This class is a simple command line based utility for generating random sequences.
042     *
043     * <P>The main method takes three parameters from the command line to generate a
044     * sequence: <CODE>type</CODE>, <CODE>size</CODE> and <CODE>file</CODE>, where:
045     * <UL>
046     * <LI><B><CODE>type</CODE></B> is either <CODE>DNA</CODE> for DNA sequences or
047     * <CODE>PROT</CODE> for protein sequences.
048     * <LI><B><CODE>size</CODE></B> is the number os characters.
049     * <LI><B><CODE>file</CODE></B> (optional) is the name of a file (if ommited, sequence
050     * is written to standard output).
051     * </UL>
052     * </P>
053     *
054     * @author Sergio A. de Carvalho Jr.
055     */
056    public class RandomSequenceGenerator
057    {
058            /**
059             * Character set for DNA sequences.
060             */
061            private static final char DNA_CHARS[] = {'A', 'C', 'G', 'T'};
062    
063            /**
064             * Character set for protein sequences.
065             */
066            private static final char PROT_CHARS[] = {'A','R','N','D','C','Q','E','G','H','I',
067                                                                    'L','K','M','F','P','S','T','W','Y','V','B','Z','X'};
068    
069            /**
070             * The main method takes three parameters from the command line to generate a
071             * sequence. See the class description for details.
072             *
073             * @param args command line arguments
074             */
075            public static void main (String args[])
076            {
077                    Writer          output;
078                    String          seq_type, filename;
079                    int                     size, random;
080                    char            charset[];
081                    int                     qty[];
082    
083                    try
084                    {
085                            // get 1st argument (required): file type
086                            seq_type = args[0];
087    
088                            // get 2nd argument (required): number of characters
089                            size = Integer.parseInt(args[1]);
090                    }
091                    catch (ArrayIndexOutOfBoundsException e)
092                    {
093                            usage();
094                            System.exit(1);
095                            return;
096                    }
097                    catch (NumberFormatException e)
098                    {
099                            usage();
100                            System.exit(1);
101                            return;
102                    }
103    
104                    // validate character set
105                    if (seq_type.equalsIgnoreCase("DNA"))
106                            charset = DNA_CHARS;
107                    else if (seq_type.equalsIgnoreCase("PROT"))
108                            charset = PROT_CHARS;
109                    else
110                    {
111                            // no such option
112                            usage();
113                            System.exit(1);
114                            return;
115                    }
116    
117                    // validate size
118                    if (size < 1)
119                    {
120                            System.err.println ("Error: size must be greater than 1.");
121                            System.exit(1);
122                            return;
123                    }
124    
125                    try
126                    {
127                            // get 3rd argument (optional): file name
128                            filename = args[2];
129    
130                            try
131                            {
132                                    // open file for writing
133                                    output = new BufferedWriter (new FileWriter (filename));
134                            }
135                            catch (IOException e)
136                            {
137                                    System.err.println ("Error: couldn't open " + filename + " for writing.");
138                                    e.printStackTrace();
139                                    System.exit(2);
140                                    return;
141                            }
142                    }
143                    catch (ArrayIndexOutOfBoundsException e)
144                    {
145                            // file name was ommited, use standard output
146                            filename = null;
147                            output = new OutputStreamWriter (System.out);
148                    }
149    
150                    // alocate a vector of characters
151                    qty = new int[charset.length];
152    
153                    try
154                    {
155                            // write sequence
156                            for (int i = 0; i < size; i++)
157                            {
158                                    // choose a character randomly
159                                    random = (int) (Math.random() * charset.length);
160    
161                                    // keep track of how many characters
162                                    // have been writen
163                                    qty[random]++;
164    
165                                    output.write(charset[random]);
166                            }
167    
168                            output.flush();
169    
170                            if (filename != null) output.close();
171                    }
172                    catch (IOException e)
173                    {
174                            System.err.println ("Error: failed to write sequence.");
175                            e.printStackTrace();
176                            System.exit(2);
177                            return;
178                    }
179    
180                    // print character distribution
181                    //for (int i = 0; i < charset.length; i++)
182                            //System.err.println(charset[i] + ": " + qty[i]);
183    
184                    System.exit(0);
185            }
186    
187            /**
188             * Prints command line usage.
189             */
190            private static void usage ()
191            {
192                    System.err.println(
193                    "\nUsage: RandomSequenceGenerator <type> <size> [<file>]\n\n" +
194                    "where:\n\n" +
195                    "   <type> = DNA for nucleotide sequences\n" +
196                    "         or PROT for protein sequences\n\n" +
197                    "   <size> = number os characters\n\n" +
198                    "   <file> = name of a file to where the sequence is to be written\n" +
199                    "            (if ommited, sequence is written to standard output)"
200                    );
201            }
202    }